### combine legisdata with eurobarometer
## annualized legisdata and standardize so that all left-right measures have a mean of zero
################################################
set.seed(89264)
library(xtable)
library(tidyverse)
library(data.table)
library(tidyverse);library(ggplot2)
library(ggthemes); library(ggExtra)
load("all_years_legis.RData")
## drop legis outside the EB range      
legis <- legis |> 
  filter(year > 1975) |>
  filter(year < 2024)   

position <- function(lr) {
  l <- sum(lr=="left")
  r <- sum(lr =="right")
  log(r+.5) - log(l+.5)
}  

# mean-centering
legis$gen_mean <- mean(legis$CMPgen)
legis$econ_mean <- mean(legis$CMPecon)
legis$social_mean <- mean(legis$CMPsocial)
legis$lr_llm_mean <- position(legis$llm_class)
legis$keywords_mean <- mean(legis$keywords_score)
legis$Factor_mean <- mean(legis$Factor, na.rm = TRUE)

## annual measures
annual_pos <- legis |> 
  group_by(year) |> 
  reframe("year" = as.numeric(unique(year)), "count" = n(),
          Keywords = mean(keywords_score - keywords_mean),
          CMPgen = mean(CMPgen - gen_mean),
          CMPecon = mean(CMPecon - econ_mean),
          CMPsocial = mean(CMPsocial - social_mean),
          LLM = mean(position(llm_class) - lr_llm_mean),
          Factor = mean(Factor - Factor_mean)
  ) |> 
  data.table()

# calculate cumulative means
annual_pos$Keywords_cum <- cummean(annual_pos$Keywords)
annual_pos$LLM_cum <- cummean(annual_pos$LLM)
annual_pos$CMPgen_cum <- cummean(annual_pos$CMPgen)
annual_pos$CMPecon_cum <- cummean(annual_pos$CMPecon)
annual_pos$CMPsocial_cum <- cummean(annual_pos$CMPsocial)
annual_pos$Factor_cum <- cummean(annual_pos$Factor)

### some descriptive plots
policy_scores <- annual_pos |> 
  rename("1. Keywords" = Keywords,
         "2. CMPgen" = CMPgen,
         "3. CMPecon" = CMPecon,
         "4. CMPsocial" = CMPsocial,
         "5. LLM" = LLM,
         "6. Factor" = Factor) |> 
  pivot_longer(cols = c(`1. Keywords`, `5. LLM`, 
                        `2. CMPgen`, `3. CMPecon`, `4. CMPsocial`, `6. Factor`), 
               names_to = "measure", values_to = "score")

policy_scores |> 
  ggplot(aes(y = score, x = year))+
  geom_point()+
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3, bs = "cs"))+
  theme_minimal()+
  ylab("Left - Right")+
  xlab("") +
  facet_wrap(~measure, ncol = 3, scales = "free_y")+
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 12))+ 
theme(axis.text.y = element_text(angle = 60, hjust = 1, size = 12)) 
ggsave("figures/left_right_time.png", 
       width = 7, height = 4, units = "in", dpi = 450)

## load eurobarometer

eb_data <- haven::read_dta("EB1976_2023_All_v4.dta") |> 
  select("YEAR":"WEIGHTS","OLD15") |> 
  filter(EDUC<98 ) |>
  filter(left_right<96) |> 
  filter(AGE>14) |> 
  reframe(ms = as.character(haven::as_factor(NATION)),
          year = YEAR, weights = WEIGHTS,
          old15 = OLD15,
          proeu = ifelse(proeu==3,1,0),    
          left_right = left_right,
          left_right_sq = left_right^2,
          education = as.character(haven::as_factor(EDUC)),
          sex = as.factor(haven::as_factor(SEX)),
          age = as.numeric(AGE),
          age_sq = (age^2)/100,
          education = ifelse(education == "22 OR OLDER" | education =="STILL STUDYING" ,
                             "University", "High School"),
          sex = case_when(sex == "MALE" ~ " Male",
                          sex == "FEMALE" ~ "Female",
                          .default = NA),
          identity = case_when(identity == 1 ~ 0,
                               identity > 1 ~ 1) )

eb_data$left_right_orig <- eb_data$left_right
eb_data$left_right <- (eb_data$left_right_orig - mean(eb_data$left_right_orig))/sd(eb_data$left_right_orig)
eb_data$left_right_sq <- eb_data$left_right_sq


hh_data <- eb_data |> 
  dplyr::left_join(annual_pos, by = "year")


hh_data$policy_loss_Keywords <- (hh_data$left_right - hh_data$Keywords)^2
hh_data$policy_loss_LLM <- (hh_data$left_right - hh_data$LLM)^2
hh_data$policy_loss_CMPgen <- (hh_data$left_right - hh_data$CMPgen)^2
hh_data$policy_loss_CMPecon <- (hh_data$left_right - hh_data$CMPecon)^2
hh_data$policy_loss_CMPsocial <- (hh_data$left_right - hh_data$CMPsocial)^2
hh_data$policy_loss_Factor <- (hh_data$left_right - hh_data$Factor)^2

hh_data$policy_loss_Keywords_cum <- (hh_data$left_right - hh_data$Keywords_cum)^2
hh_data$policy_loss_LLM_cum <- (hh_data$left_right - hh_data$LLM_cum)^2
hh_data$policy_loss_CMPgen_cum <- (hh_data$left_right - hh_data$CMPgen_cum)^2
hh_data$policy_loss_CMPecon_cum <- (hh_data$left_right - hh_data$CMPecon_cum)^2
hh_data$policy_loss_CMPsocial_cum <- (hh_data$left_right - hh_data$CMPsocial_cum)^2
hh_data$policy_loss_Factor_cum <- (hh_data$left_right - hh_data$Factor_cum)^2

hh_data$policy_loss_Keywords_abs <- abs(hh_data$left_right - hh_data$Keywords)
hh_data$policy_loss_LLM_abs <- abs(hh_data$left_right - hh_data$LLM)

hh_data$policy_loss_CMPgen_abs <- abs(hh_data$left_right - hh_data$CMPgen)
hh_data$policy_loss_CMPecon_abs <- abs(hh_data$left_right - hh_data$CMPecon)
hh_data$policy_loss_CMPsocial_abs <- abs(hh_data$left_right - hh_data$CMPsocial)
hh_data$policy_loss_Factor_abs <- abs(hh_data$left_right - hh_data$Factor)
hh_data$policy_loss_Factor_abs_cum <- abs(hh_data$left_right - hh_data$Factor_cum)

hh_data$country_year <- as.factor(paste(hh_data$ms,hh_data$year, sep = "_"))

hh_data$education <- as.factor(hh_data$education)
hh_data$sex <- as.factor(hh_data$sex)

hh_data <- hh_data |> 
  filter(weights>0) # dropping 147 observations with weight 0

hh_data |> 
  na.omit() |> 
  ggplot(aes(x = policy_loss_Factor, y = proeu)) +
  geom_smooth(method = "lm")+
  theme_minimal()+
  xlab("Policy loss")+
  ylab("Probablity of supporting the EU") +
  theme(legend.title=element_blank())
ggsave("figures/policy_loss_eu.png",
       width = 6, height = 2.65, units = "in", dpi = 450)
####
labs <- c("EU Support", "Policy-loss (Keywords)", "Policy-loss (CMPgen)", 
          "Policy-loss (CMPecon)", "Policy-loss (CMPsocial)", "Policy-loss (LLM)", "Policy-loss (Factor)",
          "Left-right", "Age", "Age squared", "Education", "Sex", "Identity")
hh_data |> 
  select(proeu,policy_loss_Keywords, policy_loss_CMPgen, policy_loss_CMPecon, 
         policy_loss_CMPsocial, policy_loss_LLM, policy_loss_Factor,
         left_right,  age, age_sq, education,sex, identity) |> 
vtable::st(out = "latex", labels = labs, title = "Descriptive statistics", 
           anchor = "tab:desc_summary")
### dataset to be used in the analysis
save(hh_data, file = "hh_data.RData")
